## Header
# visualize microsat data 2016-2020 quarantine animals
# last updated 2-1-2022


## prep workspace
  rm(list=ls())
  require(adegenet)
  require(pegas)
  require(hierfstat)
  require(RColorBrewer)
  cur.dir=("C:\\Users\\CGeremia\\Desktop\\Working Directory\\population structure\\")
  setwd(cur.dir)
  rm(cur.dir)
  
## read in tabular genotype data and convert to genid object
  d=read.table("data files\\tabular format.txt",colClasses = "character",header=TRUE)
  d$Year=as.numeric(d$Year)
  #break data to two periods
    hist(d$Year,n=20)
  pop=rep("early",nrow(d))
  pop[d$Year>=2015]="late"
  loci=d[,c(9:ncol(d))]
  df=df2genind(X=loci,sep="/",ind.names=d$ID,ploidy = 2,NA.char="000",pop=pop) #create genid object
  rm(d,loci,pop)
  
## filter missing data
  missingloc=propTyped(df,by="ind") #identify missing data from each individual
  removeind=names(missingloc[which(missingloc<1)])
  df=df[!row.names(df@tab) %in% removeind] #remove individual with missing data
  rm(missingloc,removeind)

  
## summarize data early/late periods
  b= basic.stats(df, diploid = TRUE) #summary of basic genetic vars
  # pca among years
  pca1=dudi.pca(tab(df),scannf=FALSE, scale=FALSE, nf=3)
    percents = pca1$eig/sum(pca1$eig)*100
    barplot(percents, ylab = "Genetic variance explained by eigenvectors (%)",names.arg = round(percents, 1))
      pal=brewer.pal(8,"Accent")
    s.class(pca1$li, pop(df),xax=1,yax=2, col=transp(pal,.6), axesell=FALSE,
          cstar=0, cpoint=3, grid=FALSE)
    title("PCA of YELL bison\naxes 1-2")
    add.scatter.eig(pca1$eig[1:20], 3,1,2)
  # dpca among years 
  grp=find.clusters(df, max.n.clust=5) #choose pcas and clusters interactively
    table(pop(df), grp$grp)
    table.value(table(pop(df), grp$grp), col.lab=paste("inf", 1:6),
              row.lab=paste("ori", 1:6))
  dapc1=dapc(df, grp$grp) #note only id's one cluster
    scatter(dapc1, posi.da="bottomright", bg="white") #would be a scatterplot if more clusters id'd
  # FST among years
  fst=genet.dist(df, method = "WC84") #no sign of structure in early vs late data
    
 
## subset data to current dataset 2015-2021
    df=seppop(df)$late
    b=basic.stats(df, diploid = TRUE)
    smry=summary(df)

    #Hardy-Weinberg
      barplot(smry$Hexp-smry$Hobs, main="Heterozygosity: expected-observed",ylab="Hexp - Hobs")
      mean(smry$Hobs); sd(smry$Hobs); min(smry$Hobs); max(smry$Hobs)
      mean(smry$Hexp); sd(smry$Hexp); min(smry$Hexp); max(smry$Hexp)
      bartlett.test(list(smry$Hexp,smry$Hobs)) #tst of variance
      t.test(smry$Hexp,smry$Hobs,pair=T,var.equal=TRUE,alter="greater") #test of means
      hwt=hw.test(df, B=0)#hwt test per loci
        hwt[which(hwt[,3]<0.05),]#report loci that failed H-W-E
    
    #allelic richness
      barplot(smry$loc.n.all, ylab="Number of alleles",main="Number of alleles per locus")
      ar=allelic.richness(df,min.n=NULL,diploid=TRUE)
          mean(ar$Ar[,1]);sd(ar$Ar[,1])
   
    # Inbreeding coefficeint
      mean(b$Fis[,1]); sd(b$Fis[,1])
  
    # Structure
      #pca
      pca1=dudi.pca(tab(df),scannf=FALSE, scale=FALSE, nf=3)
      percents = pca1$eig/sum(pca1$eig)*100
      barplot(percents, ylab = "Genetic variance explained by eigenvectors (%)",names.arg = round(percents, 1))
      pal=brewer.pal(8,"Accent")
      s.class(pca1$li, pop(df),label=NA)
      title("PCA of Bison\naxes 1-2")
      add.scatter.eig(pca1$eig[1:20], 3,1,2)
      #k-means cluster
      grp=find.clusters(df, max.n.clust=5) #choose pcas and clusters interactively

